notebook.community

Edit and run



In [1]:

    
from time import process_time
from collections import namedtuple
from scipy import stats
from numpy.random import randint
import matplotlib.pyplot as plt

%matplotlib inline



In [2]:

    
def get_sample(sample_max_size = 2 ** 32, 
               random_size = True, element_range=(0, 2 ** 16), value_type=int):
    """
    Generate a pseudo-random sample of size `sample_max_size` if `random_size`
    is False, otherwise a pseudo-random size from 0 to `sample_max_size` -1
    Each element of the sample falls in the range defined by the `element_range`
    param and belongs to the type `value_type`
    """
    
    
    sample_size = randint(0, sample_max_size) if random_size else sample_max_size
    return randint(low=element_range[0], size=sample_size,
                   high=element_range[1], dtype=value_type)



In [3]:

    
def clf_function(a, b):
    return a > b



In [4]:

    
def bubble_sort(elements, clf_function):
    
    length = len(elements)
    for i in range(length):
        swapped = False
        for j in range(length - i -1):
            if clf_function(elements[j], elements[j+1]):
                elements[j], elements[j+1] = elements[j+1], elements[j]
                swapped = True
        if not swapped:
            break
    return elements



In [5]:

    
def performance_test(sample, clf_function):
    built_in_time_0 = process_time()
    built_in_sorted_list = sorted(sample)
    built_in_delta = process_time() - built_in_time_0

    comparison_time_0 = process_time()
    comparison_sorted_list = bubble_sort(sample, clf_function)
    comparison_delta = process_time() - comparison_time_0
    
    Result_test = namedtuple('Test', 'elements, delta_time')
    built_in = Result_test(built_in_sorted_list, built_in_delta)
    cmp = Result_test(comparison_sorted_list, comparison_delta)
    
    return (built_in, cmp)



In [6]:

    
def effectiveness_test(pattern, sample, quiet_mode=False):
    if any(pattern != sample):
        for p, s in zip(pattern, sample):
            if p != s and not quiet_mode:
                print("{}/{}".format(p, s))
        return False
    else:
        if not quiet_mode:
            print("Success")
        return True



In [7]:

    
def run_tests(algorithm, clf_function, sample_size=128, test_size=1024):
    
    errors = 0
    built_in_time = []
    comparison_time = []

    for test in range(test_size):
        sample = get_sample(sample_size, random_size=False)
        pattern, sample = performance_test(sample, clf_function)
        if effectiveness_test(pattern.elements, sample.elements, quiet_mode=True):
            built_in_time.append(pattern.delta_time)
            comparison_time.append(sample.delta_time)
        else:
            errors += 1
    
    
    return (errors, built_in_time, comparison_time)



In [8]:

    
def comparison_report(pattern, comparison):
    pattern_summary = stats.describe(pattern)
    comparison_summary = stats.describe(comparison)

    print('Built in average time:    {:16.8f}\n'
          'Comparison average time:  {:16.8f}\n'
          'Relative mean efficience: {:16.8f}'.
          format(
                pattern_summary.mean,
                comparison_summary.mean,
                comparison_summary.mean / pattern_summary.mean
           )
    )

    print('-' * 80)

    print('Built in variance time:   {:20.12f}\n'
          'Comparison variance time: {:20.12f}\n'
          'Relative variance:        {:20.12f}'.
          format(
                pattern_summary.variance,
                comparison_summary.variance,
                comparison_summary.variance / pattern_summary.variance
           )
    )
    
    return (pattern_summary.mean, comparison_summary.mean)



In [9]:

    
def plot_pattern_time(pattern_time, test_size, sample_size):
    plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
    plt.ylabel("$Elapsed \quad  Time$")
    plt.title("Built in Sort")
    plt.plot(
        pattern_time, "b{}-".format("o" if test_size < 25 else "")
    )
    maximal = max(pattern_time)
    minimal = min(pattern_time)
    plt.plot(
        [maximal] * test_size, "{}-".format("o" if test_size < 25 else ""),
        label="Max: {:.6f}".format(maximal)
    )
    plt.plot(
        [minimal] * test_size, "{}-".format("o" if test_size < 25 else ""),
        label="Min: {:.6f}".format(minimal)
    )
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
    plt.grid(True)



In [10]:

    
def plot_cmp_time(cmp_time, test_size, sample_size):    
    plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
    plt.ylabel("$Elapsed \quad  Time$")
    plt.title("Bubble sort")
    plt.plot(
        cmp_time, "r{}-".format("o" if test_size < 25 else ""),
    )
    maximal, minimal = max(cmp_time), min(cmp_time)
    plt.plot(
        [maximal] * test_size, "{}-".format("o" if test_size < 25 else ""),
        label="Max: {:.6f}".format(maximal)
    )
    plt.plot(
        [minimal] * test_size, "{}-".format("o" if test_size < 25 else ""),
        label="Min: {:.6f}".format(minimal)
    )
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
    plt.grid(True)



In [11]:

    
def plot_both_times(pattern_time, cmp_time, test_size, sample_size):
    plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
    plt.ylabel("$Elapsed \quad  Time$")
    plt.title("Relative performance")
    plt.plot(
        pattern_time, "b{}-".format("o" if test_size < 25 else ""), label="Built in Sort"
    )
    plt.plot(
        cmp_time, "r{}-".format("o" if test_size < 25 else ""), label="Bubble Sort"
    )
    plt.legend(loc="upper right")
    maximal = max(cmp_time + pattern_time)
    plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
    plt.grid(True)



In [12]:

    
def plot_quotient_relation(pattern_timem, cmp_time, test_size, sample_size):
    plt.xlabel("{} runs of sample Size: {}".format(test_size, sample_size))
    plt.ylabel("$Relative \quad Elapsed \quad  Time$")
    plt.title("Bubble sort / Built in Sort")
    ratios = [bubble / cmp for bubble, cmp in zip(cmp_time, pattern_time)]
    plt.plot(ratios, "g{}-".format("o" if test_size < 25 else ""))

    maximal = max(ratios)
    minimal = min(ratios)
    plt.plot(
        [maximal] * test_size, "{}-".format("o" if test_size < 25 else ""),
        label="Max: {:.6f}".format(maximal)
    )
    plt.plot(
        [minimal] * test_size, "{}-".format("o" if test_size < 25 else ""),
        label="Min: {:.6f}".format(minimal)
    )
    plt.plot(
        [1] * test_size, "{}-".format("o" if test_size < 25 else ""), label="1"
    )
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

    plt.axis([-.1, test_size - .9, 0, maximal * 1.5])
    plt.grid(True)



In [13]:

    
def plot_graphics(pattern_time, cmp_time, test_size, sample_size):
    # Built in sort method
    plt.subplot(221)
    plot_pattern_time(pattern_time, test_size, sample_size)

    # Bubble sort
    plt.subplot(222)
    plot_cmp_time(cmp_time, test_size, sample_size)

    # Both Bubble and Built in sort
    plt.subplot(223)
    plot_both_times(pattern_time, cmp_time, test_size, sample_size)

    # Ratio between Bubble Sort / Built in Sort
    plt.subplot(224)
    plot_quotient_relation(pattern_time, cmp_time, test_size, sample_size)

    plt.subplots_adjust(top=1.5, bottom=0, left=0.10, right=1.95, hspace=.5, wspace=.75)
    plt.show()



In [14]:

    
def plot_pattern_growth(suite_cases, pattern_time, suite_size):
    plt.xlabel("Test suite size".format(len(growth_behaviour)))
    plt.ylabel("$Average \quad Elapsed \quad  Time$")
    plt.title("Average performance per suite")
    
    plt.plot(
        suite_cases, pattern_time, "b{}-".
        format("o" if suite_size < 25 else ""), label="Built in Sort"
    )
    
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.grid(True)



In [15]:

    
def plot_cmp_growth(suite_cases, cmp_time, suite_size):
    plt.xlabel("Test suite size".format(len(growth_behaviour)))
    plt.ylabel("$Average \quad Elapsed \quad  Time$")
    plt.title("Average performance per suite")
    
    plt.plot(
        suite_cases, cmp_time, "r{}-".
        format("o" if suite_size < 25 else ""), label="Bubble Sort"
    )
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.grid(True)



In [16]:

    
def plot_both_growth_times(suite_cases, pattern_time, cmp_time, suite_size):
    plt.xlabel("Test suite size".format(len(growth_behaviour)))
    plt.ylabel("$Average \quad Elapsed \quad  Time$")
    plt.title("Average performance per suite")
    
    plt.plot(
        suite_cases, cmp_time, "r{}-".
        format("o" if suite_size < 25 else ""), label="Bubble Sort"
    )
    plt.plot(
        suite_cases, pattern_time, "b{}-".
        format("o" if suite_size < 25 else ""), label="Built in Sort"
    )
    
    plt.legend(loc="upper left")
    plt.grid(True)



In [17]:

    
def plot_growth_graphics(growth_behaviour):
    suite_size = len(growth_behaviour)
    suite_cases = [entry.sample_size for entry in growth_behaviour]
    pattern_time = [entry.pattern_average for entry in growth_behaviour]
    cmp_time = [entry.cmp_average for entry in growth_behaviour]
    
    # Built in sort method
    plt.subplot(221)
    plot_pattern_growth(suite_cases, pattern_time, suite_size)

    # Bubble sort
    plt.subplot(222)
    plot_cmp_growth(suite_cases, cmp_time, suite_size)

    # Both Bubble and Built in sort
    plt.subplot(223)
    plot_both_growth_times(suite_cases, pattern_time, cmp_time, suite_size)

    plt.subplots_adjust(top=1.5, bottom=0, left=0.10, right=1.95, hspace=.5, wspace=.75)
    plt.show()



In [18]:

    
AverageTime = namedtuple('AverageTime', 'pattern_average, cmp_average, sample_size')
growth_behaviour = []

Running 2048 times a sample with 1 element



In [19]:

    
sample_size = 1
test_size = 2048



In [20]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 1



In [21]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00000951
Comparison average time:        0.00000531
Relative mean efficience:       0.55812288
--------------------------------------------------------------------------------
Built in variance time:         0.000000000030
Comparison variance time:       0.000000000003
Relative variance:              0.115763944476



In [22]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 2 elements



In [23]:

    
sample_size = 2
test_size = 2048



In [24]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 2



In [25]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00000853
Comparison average time:        0.00001346
Relative mean efficience:       1.57727065
--------------------------------------------------------------------------------
Built in variance time:         0.000000000007
Comparison variance time:       0.000000000068
Relative variance:             10.225387472706



In [26]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 4 elements



In [27]:

    
sample_size = 4
test_size = 2048



In [28]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 4



In [29]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00000907
Comparison average time:        0.00001986
Relative mean efficience:       2.18987457
--------------------------------------------------------------------------------
Built in variance time:         0.000000000018
Comparison variance time:       0.000000000068
Relative variance:              3.765173578284



In [30]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 8 elements



In [31]:

    
sample_size = 8
test_size = 2048



In [32]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 8



In [33]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00000959
Comparison average time:        0.00004230
Relative mean efficience:       4.41138337
--------------------------------------------------------------------------------
Built in variance time:         0.000000000010
Comparison variance time:       0.000000000116
Relative variance:             12.050164637311



In [34]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 16 elements



In [35]:

    
sample_size = 16
test_size = 2048



In [36]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 16



In [37]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00002190
Comparison average time:        0.00016467
Relative mean efficience:       7.52087133
--------------------------------------------------------------------------------
Built in variance time:         0.000000000126
Comparison variance time:       0.000000003390
Relative variance:             26.948900780530



In [38]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 32 elements



In [39]:

    
sample_size = 32
test_size = 2048



In [40]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 32



In [41]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00002306
Comparison average time:        0.00053421
Relative mean efficience:      23.16634902
--------------------------------------------------------------------------------
Built in variance time:         0.000000000077
Comparison variance time:       0.000000014003
Relative variance:            181.066489856340



In [42]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 64 elements



In [43]:

    
sample_size = 64
test_size = 2048



In [44]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 64



In [45]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00004267
Comparison average time:        0.00210524
Relative mean efficience:      49.34108239
--------------------------------------------------------------------------------
Built in variance time:         0.000000000087
Comparison variance time:       0.000000158492
Relative variance:           1823.793555570494



In [46]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 128 elements



In [47]:

    
sample_size = 128
test_size = 2048



In [48]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 128



In [49]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00005896
Comparison average time:        0.00749469
Relative mean efficience:     127.11862077
--------------------------------------------------------------------------------
Built in variance time:         0.000000000070
Comparison variance time:       0.000001066910
Relative variance:          15313.202070742189



In [50]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 256 elements



In [51]:

    
sample_size = 256
test_size = 2048



In [52]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 256



In [53]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00014472
Comparison average time:        0.03598095
Relative mean efficience:     248.62436814
--------------------------------------------------------------------------------
Built in variance time:         0.000000000579
Comparison variance time:       0.000031299492
Relative variance:          54043.124630380749



In [54]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 512 elements



In [55]:

    
sample_size = 512
test_size = 2048



In [56]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 512



In [57]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00036892
Comparison average time:        0.18027627
Relative mean efficience:     488.65617516
--------------------------------------------------------------------------------
Built in variance time:         0.000000007889
Comparison variance time:       0.001902137979
Relative variance:         241105.701796587207



In [58]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 1024 elements



In [59]:

    
sample_size = 1024
test_size = 2048



In [60]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 1024



In [61]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00079435
Comparison average time:        0.72281628
Relative mean efficience:     909.94606648
--------------------------------------------------------------------------------
Built in variance time:         0.000000022398
Comparison variance time:       0.016303729409
Relative variance:         727909.241680937936



In [62]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 2048 elements



In [63]:

    
sample_size = 2048
test_size = 2048



In [64]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 2048



In [65]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00164164
Comparison average time:        2.73741568
Relative mean efficience:    1667.48424610
--------------------------------------------------------------------------------
Built in variance time:         0.000000044823
Comparison variance time:       0.050597301494
Relative variance:        1128827.251554310322



In [66]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)

Running 2048 times a sample with 4096 elements



In [67]:

    
sample_size = 4096
test_size = 2048



In [68]:

    
errors, pattern_time, cmp_time = run_tests(
    bubble_sort, clf_function, sample_size=sample_size, test_size=test_size)

print("In the end of the day {} error{} occurred in {} runs of size {}".
      format(errors, "s" if errors > 1 else "", test_size, sample_size))









    



In the end of the day 0 error occurred in 2048 runs of size 4096



In [69]:

    
average_pattern_time, average_cmp_time = comparison_report(pattern_time, cmp_time)
growth_behaviour.append(AverageTime(average_pattern_time, average_cmp_time, sample_size))









    



Built in average time:          0.00371342
Comparison average time:       11.55913126
Relative mean efficience:    3112.79672465
--------------------------------------------------------------------------------
Built in variance time:         0.000000143734
Comparison variance time:       0.286423356013
Relative variance:        1992725.883178265765



In [70]:

    
plot_graphics(pattern_time, cmp_time, test_size, sample_size)



In [71]:

    
plot_growth_graphics(growth_behaviour)